!~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
!    Math and Computer Science Division, Argonne National Laboratory   !
!-----------------------------------------------------------------------
! CVS $Id: m_SparseMatrixDecomp.F90 18 2005-12-12 17:49:42Z mvr $
! CVS $Name$ 
!BOP -------------------------------------------------------------------
!
! !MODULE: m_SparseMatrixDecomp -- Parallel sparse matrix decomposition.
!
! !DESCRIPTION:
! The {\tt SparseMatrix} datatype provides sparse matrix storage for 
! the parallel matrix-vector multiplication ${\bf y} = {\bf M} {\bf x}$.
! This module provides services to create decompositions for the 
! {\tt SparseMatrix}.  The matrix decompositions available are row 
! and column decompositions.  They are generated by invoking the 
! appropriate routine in this module, and passing the corresponding 
! {\em vector} decomposition.  For a row (column) decomposition, one 
! invokes the routine {\tt ByRow()} ({\tt ByColumn()}), passing the 
! domain decomposition for the vector {\bf y} ({\bf x}).
!
! !INTERFACE:

 module m_SparseMatrixDecomp

      private   ! except

! !PUBLIC MEMBER FUNCTIONS:
!
      public :: ByColumn
      public :: ByRow


    interface ByColumn ; module procedure &
         ByColumnGSMap_
    end interface

    interface ByRow ; module procedure &
         ByRowGSMap_
    end interface

! !REVISION HISTORY:
! 13Apr01 - J.W. Larson <larson@mcs.anl.gov> - initial prototype
!           and API specifications.
! 03Aug01 - E. Ong <eong@mcs.anl.gov> - in ByRowGSMap and ByColumnGSMap,
!           call GlobalSegMap_init on non-root processes with actual 
!           shaped arguments to satisfy Fortran 90 standard. See
!           comments in ByRowGSMap/ByColumnGSMap.
!EOP ___________________________________________________________________

  character(len=*),parameter :: myname='MCT::m_SparseMatrixDecomp'

 contains

!-------------------------------------------------------------------------
!     Math + Computer Science Division / Argonne National Laboratory     !
!-------------------------------------------------------------------------
!BOP
!
! !IROUTINE:  ByColumnGSMap_ - Generate Row-based GlobalSegMap for SparseMatrix
! 
! !INTERFACE:

 subroutine ByColumnGSMap_(xGSMap, sMat, sMGSMap, root, comm)
!
! !USES:
!
   use m_die,  only: MP_perr_die,die

   use m_List, only: List
   use m_List, only: List_init => init
   use m_List, only: List_clean => clean

   use m_AttrVect, only: AttrVect
   use m_AttrVect, only: AttrVect_init => init
   use m_AttrVect, only: AttrVect_lsize => lsize
   use m_AttrVect, only: AttrVect_indexIA => indexIA
   use m_AttrVect, only: AttrVect_copy => copy
   use m_AttrVect, only: AttrVect_clean => clean
   
   use m_AttrVectComms, only: AttrVect_scatter => scatter
   use m_AttrVectComms, only: AttrVect_gather => gather

   use m_GlobalMap, only : GlobalMap
   use m_GlobalMap, only : GlobalMap_init => init
   use m_GlobalMap, only : GlobalMap_clean => clean

   use m_GlobalSegMap, only: GlobalSegMap
   use m_GlobalSegMap, only: GlobalSegMap_init => init
   use m_GlobalSegMap, only: GlobalSegMap_peLocs => peLocs
   use m_GlobalSegMap, only: GlobalSegMap_comp_id => comp_id

   use m_SparseMatrix, only: SparseMatrix
   use m_SparseMatrix, only: SparseMatrix_lsize => lsize
   use m_SparseMatrix, only: SparseMatrix_SortPermute => SortPermute

   implicit none

! !INPUT PARAMETERS: 
!
   type(GlobalSegMap), intent(in)    :: xGSMap
   integer,            intent(in)    :: root
   integer,            intent(in)    :: comm

! !INPUT/OUTPUT PARAMETERS: 
!
   type(SparseMatrix), intent(inout) :: sMat

! !OUTPUT PARAMETERS:
!
   type(GlobalSegMap), intent(out) :: sMGSMap

! !DESCRIPTION: This routine is invoked from all processes on the 
! communicator {\tt comm} to create from an input {\tt SparseMatrix}
! {\tt sMat} (valid only on the {\tt root} process) and an input 
! {\bf x}-vector decomposition described by the {\tt GlobalSegMap} 
! argument {\tt xGSMap} (valid at least on the {\tt root}) to create 
! an output {\tt GlobalSegMap} decomposition of the matrix elements 
! {\tt sMGSMap}, which is valid on all processes on the communicator.
! This matrix {\tt GlobalSegMap} describes the corresponding column 
! decomposition of {\tt sMat}.
!
! {\bf N.B.}:  The argument {\tt sMat} is returned sorted in lexicographic 
! order by column and row.
!
! !REVISION HISTORY: 
!
! 13Apr01 - J.W. Larson <larson@mcs.anl.gov> - initial API spec.
! 26Apr01 - R.L. Jacob <jacob@mcs.anl.gov> - add use statements for
!           GlobalSegMap_init and GSMap_peLocs.
!           Add gsize argument required to GSMap_peLocs.
!           Add underscore to ComputeSegments call so it matches
!           the subroutine decleration.
!           change attribute on starts,lengths, and pe_locs to
!           pointer to match GSMap_init.
!           add use m_die statement
! 26Apr01 - J.W. Larson <larson@mcs.anl.gov> - fixed major logic bug
!           that had all processes executing some operations that 
!           should only occur on the root.
! 09Jul03 - E.T. Ong <eong@mcs.anl.gov> - call pe_locs in parallel. 
!           reduce the serial sort from gcol:grow to just gcol.
!EOP
!-------------------------------------------------------------------------

  character(len=*),parameter :: myname_=myname//'ByColumnGSMap_'
! Process ID number
  integer :: myID, mySIZE
! Attributes for the output GlobalSegMap
  integer :: gsize, comp_id, ngseg
! Temporary array for identifying each matrix element column and 
! process ID destination
  type(AttrVect) :: gcol
  type(AttrVect) :: dist_gcol
  type(AttrVect) :: element_pe_locs 
  type(AttrVect) :: dist_element_pe_locs
! Index variables for the AttrVects
  integer :: dist_gsize
  integer :: gcol_index
  integer :: element_pe_locs_index
! Temporary array for initializing GlobalMap Decomposition
  integer,dimension(:), allocatable :: counts
! GlobalMap for setting up decomposition to call pe_locs
  type(GlobalMap) :: dist_GMap
! Temporary arrays for matrix GlobalSegMap attributes
  integer, dimension(:), pointer :: starts, lengths, pe_locs
! List storage for sorting keys
  type(List) :: sort_keys
! Error flag
  integer :: ierr
! Loop index
  integer :: i

       ! Determine process id number myID

  call MPI_COMM_RANK(comm, myID, ierr)
  if(ierr /= 0) then
     call MP_perr_die(myname_,'call MPI_COMM_RANK(...',ierr)
  endif

      ! Determine the number of processors in communicator

  call MPI_COMM_SIZE(comm, mySIZE, ierr)
  if(ierr /= 0) then
     call MP_perr_die(myname_,'call MPI_COMM_SIZE(...',ierr)
  endif

       ! Allocate space for GlobalMap length information

  allocate(counts(0:mySIZE-1),stat=ierr)
  if(ierr/=0) call die(myname_,"allocate(counts)",ierr)

       ! First step:  a lot of prep work on the root only:

  if(myID == root) then

       ! Sort the matrix entries in sMat by column.  
       ! First, create the key list...

     call List_init(sort_keys,'gcol')

       ! Now perform the sort/permute...

     call SparseMatrix_SortPermute(sMat, sort_keys)

     call List_clean(sort_keys)

       ! The global size of matrix GlobalSegMap is the number nonzero
       ! elements in sMat.  

     gsize = SparseMatrix_lsize(sMat)

       ! Allocate storage space for matrix element column indices and
       ! process ID destinations

     call AttrVect_init(aV=gcol, iList="gcol", lsize=gsize)

       ! Extract global column information and place in array gCol

     call AttrVect_copy(aVin=sMat%data, aVout=gcol, iList="gcol")

       ! Setup GlobalMap decomposition lengths:

     do i=0,mySIZE-1
        counts(i) = gsize/mySIZE
     enddo
     counts(mySIZE-1) = counts(mySIZE-1) + mod(gsize,mySIZE)

  endif

       ! Initialize GlobalMap so that we can scatter the global row
       ! information. The GlobalMap will inherit the component ID
       ! from xGSMap

  comp_id = GlobalSegMap_comp_id(xGSMap)

  call GlobalMap_init(GMap=dist_GMap, comp_id=comp_id, lns=counts, &
                      root=root, comm=comm)

  call AttrVect_scatter(iV=gcol, oV=dist_gcol, GMap=dist_GMap, &
                        root=root, comm=comm)

       ! Similarly, we want to scatter the element_pe_locs using the 
       ! same decomposition
  
  dist_gsize = AttrVect_lsize(dist_gcol)

  call AttrVect_init(aV=dist_element_pe_locs, iList="element_pe_locs", &
                     lsize=dist_gsize)
     
       ! Compute process ID destination for each matrix element,
       ! and store in the AttrVect element_pe_locs

  gcol_index = AttrVect_indexIA(dist_gcol,"gcol", dieWith=myname_)
  element_pe_locs_index = AttrVect_indexIA(dist_element_pe_locs, &
                            "element_pe_locs", dieWith=myname_)

  call GlobalSegMap_peLocs(xGSMap, dist_gsize,      &
          dist_gcol%iAttr(gcol_index,1:dist_gsize), &
          dist_element_pe_locs%iAttr(element_pe_locs_index,1:dist_gsize))

  call AttrVect_gather(iV=dist_element_pe_locs, oV=element_pe_locs, &
                       GMap=dist_GMap, root=root, comm=comm)

       ! Back to the root operations

  if(myID == root) then

       ! Sanity check: Is the globalsize of sMat the same as the 
       ! gathered size of element_pe_locs?
                  
     if(gsize /= AttrVect_lsize(element_pe_locs)) then
        call die(myname_,"gsize /= AttrVect_lsize(element_pe_locs) &
                 & on root process")
     endif
       
       ! Using the entries of gCol and element_pe_locs, build the
       ! output GlobalSegMap attribute arrays starts(:), lengths(:),
       ! and pe_locs(:)

     gcol_index = AttrVect_indexIA(gcol,"gcol", dieWith=myname_)
     element_pe_locs_index = AttrVect_indexIA(element_pe_locs, &
                                "element_pe_locs", dieWith=myname_)

     call ComputeSegments_(element_pe_locs%iAttr(element_pe_locs_index, &
                                                              1:gsize), &
                           gcol%iAttr(gcol_index,1:gsize), &
                           gsize, ngseg, starts, lengths, pe_locs)
       ! Clean up on the root

     call AttrVect_clean(gcol)
     call AttrVect_clean(element_pe_locs)

  endif ! if(myID == root)

       ! Non-root processes call GlobalSegMap_init with root_start, 
       ! root_length, and root_pe_loc, although these arguments are 
       ! not used in the subroutine. Since these correspond to dummy 
       ! shaped array arguments in initr_, the Fortran 90 standard 
       ! dictates that the actual arguments must contain complete shape 
       ! information. Therefore, these array arguments must be 
       ! allocated on all processes.

  if(myID /= root) then
     allocate(starts(0),lengths(0),pe_locs(0),stat=ierr)
     if(ierr /= 0) then
        call die(myname_,'non-root allocate(starts...',ierr)
     endif
  endif

       ! Using this local data on the root, create the SparseMatrix 
       ! GlobalSegMap sMGSMap (which will be valid on all processes
       ! on the communicator:

  call GlobalSegMap_init(sMGSMap, ngseg, starts, lengths, pe_locs, &
                         root, comm, comp_id, gsize)

       ! Clean up 

  call GlobalMap_clean(dist_GMap)
  call AttrVect_clean(dist_gcol)
  call AttrVect_clean(dist_element_pe_locs)

  deallocate(starts, lengths, pe_locs, counts, stat=ierr)
  if(ierr /= 0) then
     call die(myname_,'deallocate(starts...',ierr)
  endif


 end subroutine ByColumnGSMap_

!-------------------------------------------------------------------------
!     Math + Computer Science Division / Argonne National Laboratory     !
!-------------------------------------------------------------------------
!BOP
!
! !IROUTINE:  ByRowGSMap_ - Generate Row-based GlobalSegMap for SparseMatrix
! 
! !INTERFACE:

 subroutine ByRowGSMap_(yGSMap, sMat, sMGSMap, root, comm)
!
! !USES:
!

   use m_die,  only: MP_perr_die,die

   use m_List, only: List
   use m_List, only: List_init => init
   use m_List, only: List_clean => clean

   use m_AttrVect, only: AttrVect
   use m_AttrVect, only: AttrVect_init => init
   use m_AttrVect, only: AttrVect_lsize => lsize
   use m_AttrVect, only: AttrVect_indexIA => indexIA
   use m_AttrVect, only: AttrVect_copy => copy
   use m_AttrVect, only: AttrVect_clean => clean
   
   use m_AttrVectComms, only: AttrVect_scatter => scatter
   use m_AttrVectComms, only: AttrVect_gather => gather

   use m_GlobalMap, only : GlobalMap
   use m_GlobalMap, only : GlobalMap_init => init
   use m_GlobalMap, only : GlobalMap_clean => clean

   use m_GlobalSegMap, only: GlobalSegMap
   use m_GlobalSegMap, only: GlobalSegMap_init => init
   use m_GlobalSegMap, only: GlobalSegMap_peLocs => peLocs
   use m_GlobalSegMap, only: GlobalSegMap_comp_id => comp_id

   use m_SparseMatrix, only: SparseMatrix
   use m_SparseMatrix, only: SparseMatrix_lsize => lsize
   use m_SparseMatrix, only: SparseMatrix_SortPermute => SortPermute

   implicit none

! !INPUT PARAMETERS: 
!
   type(GlobalSegMap), intent(in)    :: yGSMap
   integer,            intent(in)    :: root
   integer,            intent(in)    :: comm

! !INPUT/OUTPUT PARAMETERS: 
!
   type(SparseMatrix), intent(inout) :: sMat

! !OUTPUT PARAMETERS:
!
   type(GlobalSegMap), intent(out) :: sMGSMap

! !DESCRIPTION: This routine is invoked from all processes on the 
! communicator {\tt comm} to create from an input {\tt SparseMatrix}
! {\tt sMat} (valid only on the {\tt root} process) and an input 
! {\bf y}-vector decomposition described by the {\tt GlobalSegMap} 
! argument {\tt yGSMap} (valid at least on the {\tt root}) to create 
! an output {\tt GlobalSegMap} decomposition of the matrix elements 
! {\tt sMGSMap}, which is valid on all processes on the communicator.
! This matrix {\tt GlobalSegMap} describes the corresponding row 
! decomposition of {\tt sMat}.
!
! {\bf N.B.}:  The argument {\tt sMat} is returned sorted in lexicographic 
! order by row and column.
!
! !REVISION HISTORY: 
!
! 13Apr01 - J.W. Larson <larson@mcs.anl.gov> - initial API spec.
! 26Apr01 - R.L. Jacob <jacob@mcs.anl.gov> - add use statements for
!           GlobalSegMap_init and GSMap_peLocs.
!           Add gsize argument required to GSMap_peLocs.
!           Add underscore to ComputeSegments call so it matches
!           the subroutine decleration.
!           change attribute on starts,lengths, and pe_locs to
!            pointer to match GSMap_init.
! 26Apr01 - J.W. Larson <larson@mcs.anl.gov> - fixed major logic bug
!           that had all processes executing some operations that 
!           should only occur on the root.
! 09Jun03 - E.T. Ong <eong@mcs.anl.gov> - call peLocs in parallel.
!           reduce the serial sort from grow:gcol to just grow.
!EOP
!-------------------------------------------------------------------------

  character(len=*),parameter :: myname_=myname//'ByRowGSMap_'
! Process ID number and communicator size
  integer :: myID, mySIZE
! Attributes for the output GlobalSegMap
  integer :: gsize, comp_id, ngseg
! Temporary array for identifying each matrix element row and 
! process ID destination
  type(AttrVect) :: grow
  type(AttrVect) :: dist_grow
  type(AttrVect) :: element_pe_locs 
  type(AttrVect) :: dist_element_pe_locs
! Index variables for AttrVects
  integer :: dist_gsize
  integer :: grow_index
  integer :: element_pe_locs_index
! Temporary array for initializing GlobalMap Decomposition
  integer,dimension(:), allocatable :: counts
! GlobalMap for setting up decomposition to call pe_locs
  type(GlobalMap) :: dist_GMap
! Temporary arrays for matrix GlobalSegMap attributes
  integer, dimension(:), pointer :: starts, lengths, pe_locs
! List storage for sorting keys
  type(List) :: sort_keys
! Error flag
  integer :: ierr
! Loop index
  integer :: i

       ! Determine process id number myID

  call MPI_COMM_RANK(comm, myID, ierr)
  if(ierr /= 0) then
     call MP_perr_die(myname_,'call MPI_COMM_RANK(...',ierr)
  endif

       ! Determine the number of processors in communicator

  call MPI_COMM_SIZE(comm, mySIZE, ierr)
  if(ierr /= 0) then
     call MP_perr_die(myname_,'call MPI_COMM_SIZE(...',ierr)
  endif

       ! Allocate space for GlobalMap length information

  allocate(counts(0:mySIZE-1),stat=ierr)
  if(ierr/=0) call die(myname_,"allocate(counts)",ierr)

       ! First step:  a lot of prep work on the root only:

  if(myID == root) then

       ! Sort the matrix entries in sMat by row. 
       ! First, create the key list...

     call List_init(sort_keys,'grow')

       ! Now perform the sort/permute...

     call SparseMatrix_SortPermute(sMat, sort_keys)

     call List_clean(sort_keys)

       ! The global size of matrix GlobalSegMap is the number of rows.  

     gsize = SparseMatrix_lsize(sMat)

       ! Allocate storage space for matrix element row indices and
       ! process ID destinations

     call AttrVect_init(aV=grow, iList="grow", lsize=gsize)

       ! Extract global row information and place in AttrVect grow

     call AttrVect_copy(aVin=sMat%data, aVout=grow, iList="grow")

       ! Setup GlobalMap decomposition lengths: 
       ! Give any extra points to the last process

     do i=0,mySIZE-1
        counts(i) = gsize/mySIZE
     enddo
     counts(mySIZE-1) = counts(mySIZE-1) + mod(gsize,mySIZE)

  endif
  
       ! Initialize GlobalMap and scatter the global row information.
       ! The GlobalMap will inherit the component ID from yGSMap

  comp_id = GlobalSegMap_comp_id(yGSMap)
     
  call GlobalMap_init(GMap=dist_GMap, comp_id=comp_id, lns=counts, &
                      root=root, comm=comm)

  call AttrVect_scatter(iV=grow, oV=dist_grow, GMap=dist_GMap, &
                        root=root, comm=comm)

       ! Similarly, we want to scatter the element_pe_locs using the 
       ! same decomposition
  
  dist_gsize = AttrVect_lsize(dist_grow)

  call AttrVect_init(aV=dist_element_pe_locs, iList="element_pe_locs", &
                     lsize=dist_gsize)
     
       ! Compute process ID destination for each matrix element,
       ! and store in the AttrVect element_pe_locs

  grow_index = AttrVect_indexIA(dist_grow,"grow", dieWith=myname_)
  element_pe_locs_index = AttrVect_indexIA(dist_element_pe_locs, &
                            "element_pe_locs", dieWith=myname_)

  call GlobalSegMap_peLocs(yGSMap, dist_gsize,      &
          dist_grow%iAttr(grow_index,1:dist_gsize), &
          dist_element_pe_locs%iAttr(element_pe_locs_index,1:dist_gsize))

       ! Gather element_pe_locs on root so that we can call compute_segments

  call AttrVect_gather(iV=dist_element_pe_locs, oV=element_pe_locs, &
                       GMap=dist_GMap, root=root, comm=comm)

       ! Back to the root operations

  if(myID == root) then

       ! Sanity check: Is the globalsize of sMat the same as the 
       ! gathered size of element_pe_locs?
                  
     if(gsize /= AttrVect_lsize(element_pe_locs)) then
        call die(myname_,"gsize /= AttrVect_lsize(element_pe_locs) &
                 & on root process")
     endif

       ! Using the entries of grow and element_pe_locs, build the
       ! output GlobalSegMap attribute arrays starts(:), lengths(:),
       ! and pe_locs(:)

     grow_index = AttrVect_indexIA(grow,"grow", dieWith=myname_)
     element_pe_locs_index = AttrVect_indexIA(element_pe_locs, &
                                "element_pe_locs", dieWith=myname_)

     call ComputeSegments_(element_pe_locs%iAttr(element_pe_locs_index, &
                                                              1:gsize), &
                           grow%iAttr(grow_index,1:gsize), &
                           gsize, ngseg, starts, lengths, pe_locs)

       ! Clean up on the root

     call AttrVect_clean(grow)
     call AttrVect_clean(element_pe_locs)

  endif ! if(myID == root)

       ! Non-root processes call GlobalSegMap_init with root_start, 
       ! root_length, and root_pe_loc, although these arguments are 
       ! not used in the subroutine. Since these correspond to dummy 
       ! shaped array arguments in initr_, the Fortran 90 standard 
       ! dictates that the actual arguments must contain complete shape 
       ! information. Therefore, these array arguments must be 
       ! allocated on all processes.

  if(myID /= root) then
     allocate(starts(0),lengths(0),pe_locs(0),stat=ierr)
     if(ierr /= 0) then
	call die(myname_,'non-root allocate(starts...',ierr)
     endif
  endif

       ! Using this local data on the root, create the SparseMatrix 
       ! GlobalSegMap sMGSMap (which will be valid on all processes
       ! on the communicator. The GlobalSegMap will inherit the 
       ! component ID from yGSMap

  call GlobalSegMap_init(sMGSMap, ngseg, starts, lengths, pe_locs, &
                         root, comm, comp_id, gsize)

       ! Clean up:

  call GlobalMap_clean(dist_GMap)
  call AttrVect_clean(dist_grow)
  call AttrVect_clean(dist_element_pe_locs)

  deallocate(starts, lengths, pe_locs, counts, stat=ierr)
  if(ierr /= 0) then
     call die(myname_,'deallocate(starts...',ierr)
  endif


 end subroutine ByRowGSMap_

!-------------------------------------------------------------------------
!     Math + Computer Science Division / Argonne National Laboratory     !
!-------------------------------------------------------------------------
!BOP
!
! !IROUTINE:  ComputeSegments_ - Create segments from list data.
! 
! !INTERFACE:

 subroutine ComputeSegments_(element_pe_locs, elements, num_elements, &
                             nsegs, seg_starts, seg_lengths, seg_pe_locs)
!
! !USES:
!

   use m_die,  only: die

   implicit none

! !INPUT PARAMETERS: 
!
   integer, dimension(:), intent(in)  :: element_pe_locs
   integer, dimension(:), intent(in)  :: elements
   integer,               intent(in)  :: num_elements

! !OUTPUT PARAMETERS:
!
   integer,               intent(out) :: nsegs
   integer, dimension(:), pointer     :: seg_starts
   integer, dimension(:), pointer     :: seg_lengths
   integer, dimension(:), pointer     :: seg_pe_locs

! !DESCRIPTION: This routine examins an input list of {\tt num\_elements} 
! process ID locations stored in the array {\tt element\_pe\_locs}, counts
! the number of contiguous segments {\tt nsegs}, and returns the segment 
! start index, length, and process ID location in the arrays {\tt seg\_starts(:)},
! {\tt seg\_lengths(:)}, and {\tt seg\_pe\_locs(:)}, respectively.
!
! {\bf N.B.}:  The argument {\tt sMat} is returned sorted in lexicographic 
! order by row and column.
!
! !REVISION HISTORY: 
!
! 18Apr01 - J.W. Larson <larson@mcs.anl.gov> - initial version.
! 28Aug01 - M.J. Zavislak <zavislak@mcs.anl.gov>
!               Changed first sanity check to get size(element_pe_locs)
!               instead of size(elements)
!EOP
!-------------------------------------------------------------------------
  character(len=*),parameter :: myname_=myname//'ComputeSegments_'

  integer :: i, ierr, iseg

       ! Input argument sanity checks:

  if(size(element_pe_locs) < num_elements) then
     call die(myname_,'input argument array element_pe_locs too small', &
	  num_elements-size(element_pe_locs))
  endif

  if(size(elements) < num_elements) then
     call die(myname_,'input argument array elements too small', &
	  num_elements-size(elements))
  endif

       ! First pass:  how many segments?

  do i=1,num_elements

     if(i == 1) then ! bootstrap segment count

	nsegs = 1

     else ! usual point/segment processing

       ! New segment?  If so, increment nsegs.

	if((elements(i) > elements(i-1) + 1) .or. &
	     (element_pe_locs(i) /= element_pe_locs(i-1))) then ! new segment
	   nsegs = nsegs + 1
	endif

     endif ! if(i == 1) block

  end do  ! do i=1,num_elements

  allocate(seg_starts(nsegs), seg_lengths(nsegs), seg_pe_locs(nsegs), &
           stat=ierr)

  if(ierr /= 0) then
     call die(myname_,'allocate(seg_starts...',ierr)
  endif

       ! Second pass:  fill in segment data.

       ! NOTE: Structure of this loop was changed from a for loop
       ! to avoid a faulty vectorization on the SUPER-UX compiler

  i=1
  ASSIGN_LOOP: do

     if(i == 1) then  ! bootstrap first segment info.

        iseg = 1
        seg_starts(iseg) = 1
        seg_lengths(iseg) = 1
        seg_pe_locs(iseg) = element_pe_locs(iseg)

     else ! do usual point/segment processing

	! New segment?  This happens if 1) elements(i) > elements(i-1) + 1, or
	! 2) element_pe_locs(i) /= element_pe_locs(i-1).
     
	if((elements(i) > elements(i-1) + 1) .or. &
	     (element_pe_locs(i) /= element_pe_locs(i-1))) then ! new segment

	   ! Initialize new segment
	   iseg = iseg + 1
	   seg_starts(iseg) = i
	   seg_lengths(iseg) = 1
	   seg_pe_locs(iseg) = element_pe_locs(i)

	else

	   ! Increment current segment length
	   seg_lengths(iseg) = seg_lengths(iseg) + 1

	endif ! If new segment block

     endif ! if(i == 1) block

     ! Prepare index i for the next loop around; 
     if(i>=num_elements) EXIT
     i = i + 1

  end do ASSIGN_LOOP 

  if(iseg /= nsegs) then
     call die(myname_,'segment number difference',iseg-nsegs)
  endif

 end subroutine ComputeSegments_

 end module m_SparseMatrixDecomp
